Eggcellent_Data_Analysis¶

Author: Justin Garza

Date: See below

Description:
A look at Egg Data over time

In [1]:
from datetime import datetime
from IPython.display import display
from IPython.display import Markdown as MD
current_date = datetime.now().strftime('%Y-%m-%d')
version = datetime.now().strftime('%Y%m%d.%H%M')
display(MD(f"**Date:** {current_date}"))
display(MD(f"**version:** {version}"))

Date: 2025-02-18

version: 20250218.2148

Set Up¶

In [2]:
# this code to will import all the things i need for this notebook

import os
import re
import math

import numpy as np
import pandas as pd

# for the notebook rendering 
from IPython.display import display, HTML
from IPython.display import Markdown as MD

# Graphs and Charts
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

# pandas Settings/Options
pd.set_option("display.max_rows", None) 
pd.set_option("display.max_columns", None)
pd.set_option('display.width', 9000)
pd.set_option('max_colwidth', 400)
pd.set_option('display.float_format', '{:.3f}'.format)

# colormap 
heatmapCM = sns.color_palette('Spectral_r', as_cmap=True)


## directories 
DIR = os.getcwd()
print(f'{DIR=}')

DataDIR = os.path.join(DIR,'data')
OutDIR = os.path.join(DIR,'docs')

if not os.path.exists(DataDIR):
    print('***DATA FOLDER IS MISSING***')

if not os.path.exists(OutDIR):
    os.makedirs(OutDIR)
DIR='C:\\Users\\JGarza\\GitHub\\eggcellent_data_analysis'
In [3]:
# Define the nodes (labels)
node_labels = [
    "🐔 Chicken", "🥚 Eggs", "🍳🍗 Market/Food", "🐣 Hatch", "🐔 Chickens"
]

# Define the links (source, target, and values)
source = [  0,  1,  1,  3,  4,  4 ]  # Chicken -> Eggs, Eggs -> Market/Food, etc.
target = [  1,  2,  3,  4,  2,  0 ]  # Eggs -> Market/Food, Eggs -> Hatch, etc.
values = [100, 50, 50, 50, 50, 50 ]  # Flow values

# Create the Sankey diagram
fig = go.Figure(go.Sankey(
    node=dict(
        pad=15,
        thickness=20,
        line=dict(color="black", width=0.5),
        label=node_labels
    ),
    link=dict(
        source=source,
        target=target,
        value=values
    )
))

fig.update_layout(title_text="Chicken and Egg Sankey Diagram", font_size=12,template="plotly_dark")
# Set title and show figure
fig.show()
In [4]:
months = {
    'JAN': 1,
    'FEB': 2,
    'MAR': 3,
    'APR': 4,
    'MAY': 5,
    'JUN': 6,
    'JUL': 7,
    'AUG': 8,
    'SEP': 9,
    'OCT': 10,
    'NOV': 11,
    'DEC': 12
}

def getMonthNum(mon:str):
    for m in months.keys():
        if m.upper() in mon:
            return months[m]

print(getMonthNum('JAN'))
print(getMonthNum('JUN'))
print(getMonthNum('FIRST OF DEC'))
1
6
12

Sources¶

  • QuickStats NASS
    • for EggLayers and EggPrices
  • macroTrends
    • for US Population
In [5]:
# EggLayers
el = pd.read_csv(os.path.join(DataDIR,'EggLayers.csv'))

el = el[['Year','Period','Value']]
el['Month'] =  el.Period.apply(getMonthNum)
el['Day'] = 1

el['date'] = pd.to_datetime(el[['Year','Month','Day']])
el = el[['date','Value']]

el['Value'] = el['Value'].str.replace(',', '', regex=True)
el['Value'] = pd.to_numeric(el['Value'])
el['Value'] /=1_000_000_000

el = el.sort_values(by='date',ascending=True)

display(el.head(5))
display(el.tail(5))
date Value
197 2008-01-01 0.346
196 2008-02-01 0.344
200 2008-03-01 0.343
193 2008-04-01 0.342
201 2008-05-01 0.341
date Value
12 2024-09-01 0.372
11 2024-10-01 0.377
10 2024-11-01 0.376
3 2024-12-01 0.376
0 2025-01-01 0.369
In [6]:
# Dollars per Dozen
dd = pd.read_csv(os.path.join(DataDIR,'DollarsPerDozen.csv'))

dd['Month'] = dd.Period.apply(getMonthNum)
dd['Day'] = 1

dd['date'] = pd.to_datetime(dd[['Year','Month','Day']])

dd = dd[['date','Value']]

dd = dd.sort_values(by='date',ascending=True)

display(dd.head(5))
display(dd.tail(5))
date Value
293 2000-12-01 0.674
285 2001-01-01 0.507
284 2001-02-01 0.501
288 2001-03-01 0.517
281 2001-04-01 0.484
date Value
1 2024-08-01 3.300
11 2024-09-01 1.960
10 2024-10-01 2.720
9 2024-11-01 3.220
2 2024-12-01 4.610
In [7]:
# US Population
up = pd.read_csv(os.path.join(DataDIR,'USPop.csv'))

up['Month'] = 1
up['Day'] = 1
up['date'] = pd.to_datetime(up[['Year','Month','Day']])

up = up[['date','Population','Growth Rate']]

up['Population'] = up['Population'].str.replace(',', '', regex=True)
up['Population'] = pd.to_numeric(up['Population'])
up['Population'] /=1_000_000_000

up = up.sort_values(by='date',ascending=True)

display(up.head(5))
display(up.tail(5))
date Population Growth Rate
17 2008-01-01 0.306 0.97%
16 2009-01-01 0.309 0.92%
15 2010-01-01 0.311 0.87%
14 2011-01-01 0.314 0.87%
13 2012-01-01 0.317 0.88%
date Population Growth Rate
4 2021-01-01 0.337 0.31%
3 2022-01-01 0.338 0.38%
2 2023-01-01 0.340 0.50%
1 2024-01-01 0.342 0.53%
0 2025-01-01 0.344 0.52%
In [8]:
# Create Plotly figure
fig = go.Figure()

# First trace (Primary Y-axis)
fig.add_trace(go.Scatter(
    x=el['date'], 
    y=el['Value'], 
    mode='lines', 
    name='EggLayers', 
    yaxis="y1"
    ))

# Second trace (Second Y-axis)
fig.add_trace(go.Scatter(
    x=dd['date'], 
    y=dd['Value'], 
    mode='lines', 
    name='Dollars Per Dozen', 
    yaxis="y2"
    ))

# Third trace (Third Y-axis)
fig.add_trace(go.Scatter(
    x=up['date'], 
    y=up['Population'], 
    mode='lines', 
    name='US Pop.', 
    yaxis="y3"
    ))

# Layout configuration
fig.update_layout(
    title="EggLayers x Dollars Per Dozen x US Population",
    xaxis=dict(title="X Axis"),
    
    # First Y-axis (left side)
    yaxis=dict(title="EggLayers"),
    
    # Second Y-axis (right side)
    yaxis2=dict(title="Dollars Per Dozen",
                overlaying="y", side="right"),
    
    # Third Y-axis (right side but slightly shifted)
    yaxis3=dict(title="US Pop.",
                overlaying="y", side="right", anchor="x", position=1),
    
    template="plotly_dark",
    height=750 ,
)

# Show the plot
fig.show()

Here are the major instances when chickens were culled in the United States to prevent the spread of bird flu (avian influenza), along with specific time frames:

Major Bird Flu Culling Events in the U.S.¶

  1. 2014–2015 (December 2014 – June 2015) – One of the largest outbreaks in U.S. history, involving H5N2 and H5N8, resulted in the culling of over 50 million birds, particularly in Iowa and Minnesota.
  2. 2022 (February–December) – A highly pathogenic H5N1 strain spread rapidly, causing the culling of over 50 million birds across multiple states, including Iowa, Nebraska, and Colorado.
  3. 2023 (January–December) – The outbreak continued into 2023, with additional cullings due to the persistent spread of H5N1.
  4. 2024 (Ongoing) – Outbreaks of bird flu have continued in poultry farms, leading to periodic cullings of infected flocks to prevent further spread.